function varargout = hcp2blocks(restrfile,blocksfile,dz2sib,ids,showreport)
  % Takes a "restricted data" CSV file from the HCP and generates
  % a block file that can be used to make permutations in PALM.
  %
  % Usage:
  % [EB,tabout] = hcp2blocks(restrfile,blocksfile,dz2sib,ids)
  %
  % Inputs:
  % restrfile  : CSV file downloaded from https://db.humanconnectome.org/
  %              containing the "Restricted Data"
  %              For some releases, eg HCP500, some gawk processing is needed.
  % blocksfile : CSV file to be created, with the exchangeability blocks,
  %              ready for used with PALM.
  % dz2sib     : (Optional) Defines whether dizygotic twins should be
  %              treated as ordinary siblings (true), or be a category
  %              on its own (false). Default = false.
  % ids        : (Optional) A vector of subject IDs. If supplied, only the
  %              subjects with the indicated IDs will be used.
  %
  % Outputs (if requested):
  % EB      : Block definitions, ready for use, in the original order
  %           as in the CSV file.
  % tabout  : (Optional) A table containing:
  %           - 1st col: subject ID
  %           - 2nd col: mother ID
  %           - 3rd col: father ID
  %           - 4th col: sib type
  %           - 5th col: family ID
  %           - 6th col: family type
  %           - 7th col: subject's age
  %
  % Reference:
  % * Winkler AM, Webster MA, Vidaurre D, Nichols TE, Smith SM.
  %   Multi-level block permutation. Neuroimage. 2015;123:253-68.
  %
  % _____________________________________
  % Anderson M. Winkler
  % FMRIB / University of Oxford
  % Dec/2013 (first version)
  % Feb/2017 (this version)
  % http://brainder.org
  
  warning off backtrace
  
  % Load the data and select what is now needed
  tmp = strcsvread(restrfile);
  
  % If there is no Zygosity field, create it from ZygSR and ZygGT
  zygo_idx = find(strcmpi(tmp(1,:),'Zygosity'));
  if isempty(zygo_idx),
      zygoSR_idx = find(strcmpi(tmp(1,:),'ZygositySR'));
      zygoGT_idx = find(strcmpi(tmp(1,:),'ZygosityGT'));
      tmp(:,end+1) = cell(size(tmp,1),1);
      tmp{1,end} = 'Zygosity';
      for s = 2:size(tmp,1),
          if  (numel(tmp{s,zygoGT_idx}) == 1 && isnan(tmp{s,zygoGT_idx})) || ...
              (ischar(tmp{s,zygoGT_idx}) && strcmpi(tmp{s,zygoGT_idx},' ')) || ...
              isempty(tmp{s,zygoGT_idx}),
              tmp{s,end} = tmp{s,zygoSR_idx};
          else
              tmp{s,end} = tmp{s,zygoGT_idx};
          end
      end
  end
  
  % Locate the columns with the relevant pieces of info, i.e.,
  % egoid, moid, faid, twin status and zygozity, in this exact
  % order, and take just them
  egid_idx = find(strcmpi(tmp(1,:),'Subject'));
  moid_idx = find(strcmpi(tmp(1,:),'Mother ID')   | strcmpi(tmp(1,:),'Mother_ID'));
  faid_idx = find(strcmpi(tmp(1,:),'Father ID')   | strcmpi(tmp(1,:),'Father_ID'));
  zygo_idx = find(strcmpi(tmp(1,:),'Zygosity'));
  agey_idx = strcmpi(tmp(1,:),'Age_in_Yrs');
  tab = tmp(2:end,[egid_idx moid_idx faid_idx zygo_idx]);
  age = cell2mat(tmp(2:end,agey_idx));
  
  % If subjects have these elementary info missing, remove them
  tab0a =  cellfun(@isnan, tab(:,1:3));
  tab0b = ~cellfun(@ischar,tab(:,4));
  tab0  = any(horzcat(tab0a,tab0b),2);
  idstodel = cell2mat(tab(tab0,1));
  if numel(idstodel),
      warning(sprintf([ ...
          'These subjects have data missing in the restricted file and will be removed: \n' ...
          repmat('         %d\n',1,numel(idstodel))],idstodel));
  end
  if nargin >= 4 && ~ isempty(ids) && ~ isempty(idstodel),
      ids(any(bsxfun(@eq,ids(:),idstodel'),2)) = [];
  end
  tab(tab0,:) = [];
  age(tab0)   = [];
  N = size(tab,1);
  
  % Treat non-monozygotic twins as ordinary siblings.
  if nargin >= 3 && dz2sib,
      for n = 1:N,
          if any(strcmpi(tab{n,4},{'notmz','dz'})),
              tab{n,4} = 'NotTwin';
          end
      end
  end
  
  % Instead of strings, use sib identifiers. These are based on the
  % fact that it's unlikely any family on the HCP have more than 10
  % siblings overall. If this happens, it'll be necessary to make
  % changes here.
  sibtype = zeros(N,1);
  for n = 1:N,
      if strcmpi(tab{n,4},'nottwin'),
          sibtype(n) = 10;
      elseif any(strcmpi(tab{n,4},{'notmz','dz'})),
          sibtype(n) = 100;
      elseif strcmpi(tab{n,4},'mz'),
          sibtype(n) = 1000;
      end
  end
  tab = cell2mat(tab(:,1:3));
  
  % Subselect subjects as needed
  if nargin == 4 && ~isempty(ids) && islogical(ids(1)),
      tab        = tab(ids,:);
      sibtype    = sibtype(ids,:);
  elseif nargin == 4 && ~ isempty(ids),
      idx = bsxfun(@eq,tab(:,1),ids');
      idx = ~ any(idx,1);
      if any(idx),
          warning(sprintf([ ...
              'These subjects don''t exist in the restricted file and will be removed: \n' ...
              repmat('         %d\n',1,sum(idx))],ids(idx)));
      end
      ids(idx)   = [];
      tabnew     = zeros(length(ids),size(tab,2));
      sibtypenew = zeros(length(ids),1);
      agenew     = zeros(length(ids),1);
      for n = 1:length(ids),
          idx = tab(:,1) == ids(n);
          tabnew(n,:)     = tab(idx,:);
          sibtypenew(n,:) = sibtype(idx);
          agenew(n,:)     = age(idx);
      end
      tab        = tabnew;
      sibtype    = sibtypenew;
      age        = agenew;
  end
  N = size(tab,1);
  
  % Create family IDs
  famid = zeros(N,1);
  U = unique(tab(:,2:3),'rows');
  for u = 1:size(U,1),
      uidx = all(bsxfun(@eq,tab(:,2:3),U(u,:)),2);
      famid(uidx) = u;
  end
  
  % For parents that belong to more than one family, merge
  % their families into just one, the one with lowest famid.
  par = tab(:,2:3);
  for p = par(:)', % for each parent
      pidx = any(par == p,2);
      famids = unique(famid(pidx)); % families that he/she belong to
      for f = 1:numel(famids),
          famid(famid == famids(f)) = famids(1);
      end
  end
  
  % Label each family according to their type. The "type" is
  % determined by the number and type of siblings.
  F = unique(famid);
  famtype = zeros(N,1);
  for f = 1:numel(F),
      fidx = F(f) == famid;
      famtype(fidx) = sum(sibtype(fidx)) + numel(unique(tab(fidx,2:3)));
  end
  
  % Twins which pair data isn't available should be treated as
  % non-twins, so fix and repeat computing the family types
  idx = (sibtype == 100  & (famtype >= 100  & famtype <= 199)) ...
      | (sibtype == 1000 & (famtype >= 1000 & famtype <= 1999));
  sibtype(idx) = 10;
  for f = 1:numel(F),
      fidx = F(f) == famid;
      famtype(fidx) = sum(sibtype(fidx)) + numel(unique(tab(fidx,2:3)));
  end
  
  % Append the new info to the table.
  tab = horzcat(tab,sibtype,famid,famtype);
  
  % Families of the same type can be shuffled, as well as sibs of the same
  % type. To do this, the simplest is to construct the blocks within each
  % family type, then replicate across the families of the same type.
  % Start by sorting
  [~,idx] = sortrows([famid sibtype age]);
  [~,idxback] = sort(idx);
  tab     = tab(idx,:);
  sibtype = sibtype(idx);
  famid   = famid(idx);
  famtype = famtype(idx);
  age     = age(idx,:);
  
  % Now make the blocks for each family
  B = cell(numel(F),1);
  for f = 1:numel(F),
      fidx = F(f) == famid;
      ft = famtype(find(fidx,1));
      if any(ft == [(12:10:92) 23 202 2002]),
          B{f} = horzcat(famid(fidx),sibtype(fidx),tab(fidx,1));
      else
          B{f} = horzcat(-famid(fidx),sibtype(fidx),tab(fidx,1));
          
          % Some particular cases of complicated families
          if ft == 33,
              tabx = tab(fidx,2:3);
              for s = 1:size(tabx,1),
                  if     (sum(tabx(:,1) == tabx(s,1)) == 2 && ...
                          sum(tabx(:,2) == tabx(s,2)) == 3) || ...
                          (sum(tabx(:,1) == tabx(s,1)) == 3 && ...
                          sum(tabx(:,2) == tabx(s,2)) == 2),
                      B{f}(s,2) = B{f}(s,2) + 1;
                  end
              end
          elseif ft == 53,
              tabx = tab(fidx,2:3);
              for s = 1:size(tabx,1),
                  if     (sum(tabx(:,1) == tabx(s,1)) == 3 && ...
                          sum(tabx(:,2) == tabx(s,2)) == 5) || ...
                          (sum(tabx(:,1) == tabx(s,1)) == 5 && ...
                          sum(tabx(:,2) == tabx(s,2)) == 3),
                      B{f}(s,2) = B{f}(s,2) + 1;
                  end
              end
          elseif ft == 234,
              tabx = tab(fidx,2:3);
              for s = 1:size(tabx,1),
                  if     (sum(tabx(:,1) == tabx(s,1)) == 1 && ...
                          sum(tabx(:,2) == tabx(s,2)) == 3) || ...
                          (sum(tabx(:,1) == tabx(s,1)) == 3 && ...
                          sum(tabx(:,2) == tabx(s,2)) == 1),
                      B{f}(s,2) = B{f}(s,2) + 1;
                  end
              end
          elseif ft == 54,
              tabx = tab(fidx,2:3);
              for s = 1:size(tabx,1),
                  if      sum(tabx(:,1) == tabx(s,1)) == 4 && ...
                          sum(tabx(:,2) == tabx(s,2)) == 2,
                      B{f}(s,2) = B{f}(s,2) + 1;
                  elseif  sum(tabx(:,1) == tabx(s,1)) == 1 && ...
                          sum(tabx(:,2) == tabx(s,2)) == 3,
                      B{f}(s,2) = B{f}(s,2) - 1;
                  end
              end
          elseif ft == 34,
              tabx = tab(fidx,2:3);
              for s = 1:size(tabx,1),
                  if     (sum(tabx(:,1) == tabx(s,1)) == 2 && ...
                          sum(tabx(:,2) == tabx(s,2)) == 2),
                      B{f}(s,2) = B{f}(s,2) + 1;
                      famtype(fidx) = 39;
                  end
              end
          elseif ft == 43,
              tabx = tab(fidx,2:3);
              k = 0;
              for s = 1:size(tabx,1),
                  if tabx(s,1) == tabx(1,1) && ...
                          tabx(s,2) == tabx(1,2),
                      B{f}(s,2) = B{f}(s,2) + 1;
                      k = k + 1;
                  end
              end
              if k == 2,
                  famtype(fidx) = 49;
                  B{f}(:,1) = -B{f}(:,1);
              end
          elseif ft == 44,
              tabx = tab(fidx,2:3);
              for s = 1:size(tabx,1),
                  if      sum(tabx(:,1) == tabx(s,1)) == 4 && ...
                          sum(tabx(:,2) == tabx(s,2)) == 2,
                      B{f}(s,2) = B{f}(s,2) + 1;
                  end
              end
          elseif ft == 223,
              sibx = sibtype(fidx);
              B{f}(sibx == 10,2) = -B{f}(sibx == 10,2);
          elseif ft == 302,
              famtype(fidx) = 212;
              tmpage = age(fidx);
              if tmpage(1) == tmpage(2),
                  B{f}(3,2) = 10;
              elseif tmpage(1) == tmpage(3),
                  B{f}(2,2) = 10;
              elseif tmpage(2) == tmpage(3),
                  B{f}(1,2) = 10;
              end
          elseif ft == 313 || ft == 314,
              famtype(fidx) = ft - 100 + 10;
              if famtype(fidx) == 223,
                  famtype(fidx) = 229;
              end
              tmpage = age(fidx);
              didx = find(B{f}(:,2) == 100);
              if tmpage(didx(1)) == tmpage(didx(2)),
                  B{f}(didx(3),2) = 10;
              elseif tmpage(didx(1)) == tmpage(didx(3)),
                  B{f}(didx(2),2) = 10;
              elseif tmpage(didx(2)) == tmpage(didx(3)),
                  B{f}(didx(1),2) = 10;
              end
          elseif ft == 2023,
              tabx = tab(fidx,2:3);
              for s = 1:size(tabx,1),
                  if      sum(tabx(:,1) == tabx(s,1)) == 4 && ...
                         (sum(tabx(:,2) == tabx(s,2)) == 1 || ...
                          sum(tabx(:,2) == tabx(s,2)) == 3) ,
                      famtype(fidx) = 2029;
                      if B{f}(s,2) == 10,
                          B{f}(s,2) = -B{f}(s,2);
                      end
                  end
              end
          end
      end
  end
  
  % Concatenate all. Prepending the famtype ensures that the
  % families of the same type can be shuffled whole-block. Also,
  % add column with -1, for within-block at the outermost level
  B = horzcat(-ones(N,1),famtype,cell2mat(B));
  
  % Sort back to the original order
  B   = B(idxback,:);
  tab = tab(idxback,:);
  tab(:,6) = B(:,2);
  
  % Drop columns that are redundant (useful when the supplied ids
  % contain just a few subjects)
  for c = size(B,2):-1:2,
      if numel(unique(B(:,c))) == 1,
          B(:,c) = [];
      end
  end
  if nargout >= 1,
      varargout{1} = B;
  end
  if nargout >= 2,
      varargout{2} = [tab age];
  end
  
  % Save as CSV
  if nargin >= 2 && ~isempty(blocksfile) && ischar(blocksfile),
      dlmwrite(blocksfile,B,'precision','%d');
  end
  
  % Print a simplified report if requested
  if nargin >= 5 && showreport,
      fprintf('Family type,Count,Sibship size,Number of subjects,Abbreviated description\n');
      U = unique(B(:,2));
      for u = 1:size(U,1),
          switch U(u)
              case 12,   abbrv = '1 NS';
              case 22,   abbrv = '2 FS';
              case 32,   abbrv = '3 FS';
              case 33,   abbrv = '2 FS + 1 HS';
              case 34,   abbrv = '3 HS';
              case 39,   abbrv = '3 HS/NS';
              case 42,   abbrv = '4 FS';
              case 43,   abbrv = '3 FS + 1 HS';
              case 44,   abbrv = '2 FS + 2 HS';
              case 49,   abbrv = '2 FS/HS + 2 FS/HS';
              case 52,   abbrv = '5 FS';
              case 53,   abbrv = '3 FS/HS + 2 FS/HS';
              case 54,   abbrv = '2 FS/HS + 2 FS/HS + 1 HS/NS';
              case 202,  abbrv = '2 DZ';
              case 212,  abbrv = '2 DZ + 1 FS';
              case 213,  abbrv = '2 DZ + 1 HS';
              case 222,  abbrv = '2 DZ + 2 FS';
              case 223,  abbrv = '2 DZ + 1 FS + 1 HS';
              case 224,  abbrv = '2 DZ + 2 FS/HS';
              case 229,  abbrv = '2 DZ + 2 HS/HS';
              case 234,  abbrv = '2 DZ + 2 HS + 1 HS/NS';
              case 2002, abbrv = '2 MZ';
              case 2012, abbrv = '2 MZ + 1 FS';
              case 2013, abbrv = '2 MZ + 1 HS';
              case 2022, abbrv = '2 MZ + 2 FS';
              case 2023, abbrv = '2 MZ + 2 HS';
              case 2029, abbrv = '2 MZ + 1 FS + 1 HS';
              case 2032, abbrv = '2 MZ + 3 FS';
              case 2042, abbrv = '2 MZ + 4 FS';
          end
          nP(u) = numel(unique(B(B(:,2) == U(u),3)));
          nS(u) = sum(B(:,2) == U(u));
          fprintf('%d,%d,%d,%d,%s\n',U(u),nP(u),nS(u)/nP(u),nS(u),abbrv);
      end
  end
  
  warning on backtrace